Looking at wave 03 (number of seeds is now an input).

Summary

Following measures show clear differences between model and observations for reasonable proportion of runs. Can we emulate them well enough to exploit this in history matching?

All the above are in week 12, and could also consider at earlier times, however time series plot below shows that getting right in week 12 does often naturally force runs to match earlier observations, so may not need to explicitly consider at wave 1.

We could also consider age/spatial distribution of hospital admissions rather than just deaths, however as mentioned below, this data currently seems less reliable up to March 21st.

Plotting by number of seeds

ggplot(subset(CombinedData, week == 12), aes(x = log(cumH + 1), y = log(deaths + 1), col = ns)) +
  geom_point() +
  geom_hline(yintercept = c(log(250+1), log(2500+1))) +
  geom_vline(xintercept = c(log(2500+1), log(10000+1))) +
  scale_color_viridis() +
  labs(col = 'Seeds', x = 'log(hosps)', y = 'log(deaths)')

ggplot(subset(CombinedData, week == 12), aes(x = log(cumH + 1), y = log(deaths + 1), col = R0)) +
  geom_point() +
  geom_hline(yintercept = c(log(250+1), log(2500+1))) +
  geom_vline(xintercept = c(log(2500+1), log(10000+1))) +
  scale_color_viridis() +
  labs(x = 'log(hosps)', y = 'log(deaths)')

ggplot(subset(CombinedData, week == 12), aes(x = ns, y = R0, col = log(deaths + 1))) +
  geom_point(size = 2) +
  scale_color_viridis() +
  labs(col = 'log(deaths)')

Community and hospital deaths

ggplot(data = all_week12, aes(x = log(cumCD+1), y = log(cumHD+1))) + geom_point()
ggplot(data = NewDesign, aes(x = alphaI1D, y = alphaHD)) + geom_point(col = ifelse(all_week12$cumCD < 10 | all_week12$cumHD < 10, 'red', 'green'))

Total deaths

c(sum(subset(byDayDeaths, as.character(date) <= EndDate)$value), subset(phe_deaths, date == EndDate)$cumDeaths28DaysByDeathDate, subset(phe_deaths_cert, date == EndDate)$cumDailyNsoDeathsByDeathDate)
## [1] 616 550 569
ggplot(CombinedData, aes(x = week, y = log(deaths + 1), alpha = nroy, linetype = output, col = nroy)) +
  geom_line() +
  theme(legend.position = "none") +
  scale_linetype_manual(values = rep(1, 224)) +
  geom_point(data = obs_data, aes(y = median, col = NULL, alpha = NULL, linetype = NULL)) +
  geom_point(data = obs_data, aes(y = lower, col = NULL, alpha = NULL, linetype = NULL)) +
  geom_point(data = obs_data, aes(y = upper, col = NULL, alpha = NULL, linetype = NULL)) +
  labs(y = 'log(deaths)')

Instead coloured by ns:

ggplot(CombinedData, aes(x = week, y = log(deaths + 1), linetype = output, col = ns)) +
  geom_line(alpha = 0.5) +
  scale_color_viridis() +
  scale_linetype_manual(values = rep(1, 224), guide=FALSE) +
  geom_point(data = obs_data, aes(y = median, col = NULL, alpha = NULL, linetype = NULL)) +
  geom_point(data = obs_data, aes(y = lower, col = NULL, alpha = NULL, linetype = NULL)) +
  geom_point(data = obs_data, aes(y = upper, col = NULL, alpha = NULL, linetype = NULL)) +
  labs(y = 'log(deaths)')

Total admissions

ggplot(subset(CombinedData, week == 12), aes(x = log(cumH+1), y = log(deaths+1), col = nroy)) + 
  geom_point() +
  geom_hline(yintercept = c(log(250+1), log(2500+1))) +
  geom_vline(xintercept = c(log(2500+1), log(10000+1))) +
  scale_color_manual(values = c('grey', 'green')) +
  labs(x = 'log(hosps)', y = 'log(deaths)')

Age distribution

deathsByAgeTotalMW
##   age deaths  prop
## 1   4      3 0.006
## 2   5     11 0.020
## 3   6     35 0.065
## 4   7     63 0.116
## 5   8    426 0.786
ggplot(tmp_data, aes(x = age, y = log(deaths), linetype = output, col = nroy, alpha = nroy)) +
  geom_line() + 
  theme(legend.position = "none") +
  labs(y = 'log(Deaths)') +
  scale_linetype_manual(values = rep(1, 224)) +
  scale_color_manual(values = c('grey', 'green')) +
  scale_alpha_manual(values = c(0.25,1)) +
  geom_point(data = deathsByAgeTotalMW, aes(y = log(deaths), col = NULL, linetype = NULL, alpha = NULL))

ggplot(tmp_data, aes(x = age, y = prop, linetype = output, col = nroy, alpha = nroy)) +
  geom_line() + 
  theme(legend.position = "none") +
  labs(y = 'Proportion') +
  scale_linetype_manual(values = rep(1, 224)) +
  scale_color_manual(values = c('grey', 'green')) +
  scale_alpha_manual(values = c(0.25,1)) +
  ylim(0,1) +
  geom_point(data = deathsByAgeTotalMW, aes(y = deaths / sum(deathsByAgeTotal$deaths), col = NULL, linetype = NULL, alpha = NULL))

#### Some emulators ####
mogp_dir <- '/Users/jamessalter/mogp_emulator/'
setwd('~/Documents/ExeterUQ_MOGP/')
source('BuildEmulator/BuildEmulator.R')
source('~/Documents/mogp_new_wrappers.R')
source('~/Documents/mogp_basis.R')
#new.choices <- choices.default
#new.choices$lm.maxdf <- 10 # if want to change priors, add this as list in BuildNewEmulators

# Usually scale design to [-1,1]
#### THIS ISN'T RIGHT AT THE MOMENT - NEED TO USE OVERALL MAX/MIN FOR EACH PARAMETER RATHER THAN JUST IN DESIGN ####
ScaledDesign <- NewDesign
for (i in 1:14){
  ScaledDesign[,i] <- ScaledDesign[,i] - min(ScaledDesign[,i])
  ScaledDesign[,i] <- ScaledDesign[,i] / max(ScaledDesign[,i] / 2)
  ScaledDesign[,i] <- ScaledDesign[,i] - 1
}

# Join to MW data
CombinedDataScaled <- all_week_seeds %>% left_join(ScaledDesign, by = c('output' = 'output'))

# Create tData, containing training data for emulator
tData <- data.frame(subset(CombinedDataScaled, week == 12)[1:100,7:20],
                    Noise = runif(100),
                    LogDeaths = log(subset(CombinedDataScaled, week == 12)$deaths[1:100] + 1))
  
Em_LogDeaths <- BuildNewEmulators(tData = tData, HowManyEmulators = 1, meanFun="fitted")

LOO.plot(Emulators = Em_LogDeaths, which.emulator = 1,
         ParamNames = colnames(tData)[Em_LogDeaths$fitting.elements$ActiveIndices[[1]]])
summary(Em_LogDeaths$fitting.elements$lm.object[[1]]$linModel)
save_ExUQmogp(Em_LogDeaths, file = 'Em_LogDeaths')